library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.1 v purrr 0.3.4
## v tibble 3.0.0 v dplyr 1.0.0
## v tidyr 1.1.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(ggplot2)
covid <- readRDS("data/covid_north_complete.RData")
fit = lm(new_cases_per_million~new_tests_per_million+as.factor(continent),data = covid)
summary(fit)
##
## Call:
## lm(formula = new_cases_per_million ~ new_tests_per_million +
## as.factor(continent), data = covid)
##
## Residuals:
## Min 1Q Median 3Q Max
## -587.04 -55.82 -16.14 12.19 1364.72
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.1612093 4.0415175 4.246 2.21e-05 ***
## new_tests_per_million 0.0201693 0.0007605 26.522 < 2e-16 ***
## as.factor(continent)Asia -1.6806933 5.4900207 -0.306 0.76
## as.factor(continent)Europe 95.7821128 6.7401297 14.211 < 2e-16 ***
## as.factor(continent)North America 79.4825661 5.3860577 14.757 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 134.3 on 5482 degrees of freedom
## Multiple R-squared: 0.3322, Adjusted R-squared: 0.3317
## F-statistic: 681.8 on 4 and 5482 DF, p-value: < 2.2e-16
#Taking out new tests rate effect on new cases rate
residuals = residuals(fit)
new_covid <-
covid %>% mutate("residuals" = residuals)
new_covid[2] = as.Date(new_covid$Day)
covid_usa <- new_covid %>% filter(location == "United States")
covid_canada <- new_covid %>% filter(location == "Canada")
covid_cote <- new_covid %>% filter(location == "Cote d'Ivoire")
covid_ghana <- new_covid %>% filter(location == "Ghana")
covid_guatemala <- new_covid %>% filter(location == "Guatemala")
covid_india <- new_covid %>% filter(location == "India")
covid_indonesia <- new_covid %>% filter(location == "Indonesia")
covid_italy <- new_covid %>% filter(location == "Italy")
covid_japan <- new_covid %>% filter(location == "Japan")
covid_mexico <- new_covid %>% filter(location == "Mexico")
covid_morocco <- new_covid %>% filter(location == "Morocco")
covid_portugal <- new_covid %>% filter(location == "Portugal")
covid_russia <- new_covid %>% filter(location == "Russia")
covid_saudi <- new_covid %>% filter(location == "Saudi Arabia")
covid_south_africa <- new_covid %>% filter(location == "South Africa")
covid_uk <- new_covid %>% filter(location == "United Kingdom")
new_covid_usa <-
covid_usa %>% mutate("residuals_diff" = residuals)
#deseasonalized
new_covid_usa$residuals_diff[8:409] = new_covid_usa$residuals_diff %>% diff(7)
plot.ts(new_covid_usa$residuals_diff)

# ARIMA as a special case of the ARIMAX intercept
regression_model = lm(residuals_diff~1, data = new_covid_usa[8:379,])
summary(regression_model)
##
## Call:
## lm(formula = residuals_diff ~ 1, data = new_covid_usa[8:379,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -521.76 -29.09 -3.84 29.91 514.68
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.353 4.305 0.779 0.437
##
## Residual standard error: 83.03 on 371 degrees of freedom
anova(regression_model)
## Analysis of Variance Table
##
## Response: residuals_diff
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 371 2557845 6894.5
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error,d=1)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(0,1,2)
##
## Coefficients:
## ma1 ma2
## -1.0055 0.3212
## s.e. 0.0504 0.0613
##
## sigma^2 estimated as 5462: log likelihood=-2122.31
## AIC=4250.62 AICc=4250.68 BIC=4262.37
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.2448158 73.6088 38.59669 3904.676 4149.71 0.7866605 0.003024844
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,2)
## Q* = 54.879, df = 8, p-value = 4.661e-09
##
## Model df: 2. Total lags used: 10
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_covid_usa[380:409,])
error_forecast = predict(error_model, n.ahead = 30)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_covid_usa$residuals_diff[8:409])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_covid_usa[8:409,]) +
geom_line(aes(x=Day, y=residuals_diff, color = "black")) +
geom_line(aes(x=Day, y=compare_table$predict, color = "red"))+
labs(x = "Date",
y = "USA Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Null Model, ARIMA(0,1,2)")

plot(new_covid_usa$Day[380:409], # Draw first time series
new_covid_usa$residuals_diff[380:409],
type = "l",
xlab = "Date",
ylab = "USA Prediction vs. Acutal: Null Model")
lines(new_covid_usa$Day[380:409], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE USA Null
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 5418.256
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 2231.175
regression_model = lm(residuals_diff~as.factor(vaccination_policy), data = new_covid_usa[8:379,])
summary(regression_model)
##
## Call:
## lm(formula = residuals_diff ~ as.factor(vaccination_policy),
## data = new_covid_usa[8:379, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -526.74 -28.64 -9.58 24.70 509.70
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.841 8.701 4.579 6.42e-06 ***
## as.factor(vaccination_policy)1 -78.497 12.630 -6.215 1.40e-09 ***
## as.factor(vaccination_policy)2 -41.438 16.686 -2.483 0.01346 *
## as.factor(vaccination_policy)3 -38.953 19.747 -1.973 0.04929 *
## as.factor(vaccination_policy)4 -61.935 21.645 -2.861 0.00446 **
## as.factor(vaccination_policy)5 -31.508 10.884 -2.895 0.00402 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 79.27 on 366 degrees of freedom
## Multiple R-squared: 0.1008, Adjusted R-squared: 0.08848
## F-statistic: 8.203 on 5 and 366 DF, p-value: 2.34e-07
anova(regression_model)
## Analysis of Variance Table
##
## Response: residuals_diff
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(vaccination_policy) 5 257747 51549 8.2027 2.34e-07 ***
## Residuals 366 2300098 6284
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error,d=1)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(0,1,2)
##
## Coefficients:
## ma1 ma2
## -0.9852 0.2860
## s.e. 0.0489 0.0637
##
## sigma^2 estimated as 5544: log likelihood=-2125.04
## AIC=4256.08 AICc=4256.14 BIC=4267.83
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.004498275 74.15535 39.19593 51.63764 240.5241 0.7914197
## ACF1
## Training set -0.006956154
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,2)
## Q* = 58.988, df = 8, p-value = 7.358e-10
##
## Model df: 2. Total lags used: 10
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_covid_usa[380:409,])
error_forecast = predict(error_model, n.ahead = 30)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_covid_usa$residuals_diff[8:409])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_covid_usa[8:409,]) +
geom_line(aes(x=Day, y=residuals_diff, color = "black")) +
geom_line(aes(x=Day, y=compare_table$predict, color = "red"))+
labs(x = "Date",
y = "USA Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Full Model, ARIMA(0,1,2)")

plot(new_covid_usa$Day[380:409], # Draw first time series
new_covid_usa$residuals_diff[380:409],
type = "l",
xlab = "Date",
ylab = "USA Prediction vs. Acutal: Full Model")
lines(new_covid_usa$Day[380:409], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE USA Full
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 5499.016
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 2297.392
new_covid_canada <-
covid_canada %>% mutate("residuals_diff" = residuals)
#deseasonalized
new_covid_canada$residuals_diff[8:409] = new_covid_canada$residuals_diff %>% diff(7)
plot.ts(new_covid_canada$residuals_diff)

# ARIMA as a special case of the ARIMAX intercept
regression_model = lm(residuals_diff~1, data = new_covid_canada[8:379,])
summary(regression_model)
##
## Call:
## lm(formula = residuals_diff ~ 1, data = new_covid_canada[8:379,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -163.827 -17.818 -0.067 11.820 211.984
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.355 1.908 0.71 0.478
##
## Residual standard error: 36.81 on 371 degrees of freedom
anova(regression_model)
## Analysis of Variance Table
##
## Response: residuals_diff
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 371 502636 1354.8
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error,d=1)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(2,1,1)
##
## Coefficients:
## ar1 ar2 ma1
## -0.2607 -0.2065 -0.7614
## s.e. 0.0685 0.0638 0.0547
##
## sigma^2 estimated as 1218: log likelihood=-1843.73
## AIC=3695.46 AICc=3695.57 BIC=3711.13
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.09491957 34.7177 20.38946 -957.0729 1330.579 0.7270513
## ACF1
## Training set -0.005374141
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,1)
## Q* = 72.669, df = 7, p-value = 4.26e-13
##
## Model df: 3. Total lags used: 10
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_covid_canada[380:409,])
error_forecast = predict(error_model, n.ahead = 30)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_covid_canada$residuals_diff[8:409])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_covid_canada[8:409,]) +
geom_line(aes(x=Day, y=residuals_diff, color = "black")) +
geom_line(aes(x=Day, y=compare_table$predict, color = "red"))+
labs(x = "Date",
y = "Canada Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Null Model, ARIMA(2,1,1)")

plot(new_covid_canada$Day[380:409], # Draw first time series
new_covid_canada$residuals_diff[380:409],
type = "l",
xlab = "Date",
ylab = "Canada Prediction vs. Acutal: Null Model")
lines(new_covid_canada$Day[380:409], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Canada Null
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 1205.318
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 3531.228
# ARIMA as a special case of the ARIMAX intercept
regression_model = lm(residuals_diff~as.factor(vaccination_policy), data = new_covid_canada[8:379,])
summary(regression_model)
##
## Call:
## lm(formula = residuals_diff ~ as.factor(vaccination_policy),
## data = new_covid_canada[8:379, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -164.821 -15.648 -1.514 10.815 210.990
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.315 3.958 2.606 0.00952 **
## as.factor(vaccination_policy)2 -11.258 7.196 -1.565 0.11855
## as.factor(vaccination_policy)3 -10.727 5.175 -2.073 0.03887 *
## as.factor(vaccination_policy)4 -40.713 9.375 -4.343 1.82e-05 ***
## as.factor(vaccination_policy)5 -7.966 5.166 -1.542 0.12392
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 36.06 on 367 degrees of freedom
## Multiple R-squared: 0.0507, Adjusted R-squared: 0.04036
## F-statistic: 4.9 on 4 and 367 DF, p-value: 0.0007352
anova(regression_model)
## Analysis of Variance Table
##
## Response: residuals_diff
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(vaccination_policy) 4 25484 6371.1 4.9003 0.0007352 ***
## Residuals 367 477152 1300.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error,d=1)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(2,1,1)
##
## Coefficients:
## ar1 ar2 ma1
## -0.2477 -0.1958 -0.7628
## s.e. 0.0720 0.0664 0.0600
##
## sigma^2 estimated as 1230: log likelihood=-1845.44
## AIC=3698.89 AICc=3699 BIC=3714.55
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.03584671 34.87973 20.51357 26.17381 224.0617 0.7366782
## ACF1
## Training set -0.001888925
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,1)
## Q* = 73.989, df = 7, p-value = 2.3e-13
##
## Model df: 3. Total lags used: 10
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_covid_canada[380:409,])
error_forecast = predict(error_model, n.ahead = 30)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_covid_canada$residuals_diff[8:409])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_covid_canada[8:409,]) +
geom_line(aes(x=Day, y=residuals_diff, color = "black")) +
geom_line(aes(x=Day, y=compare_table$predict, color = "red"))+
labs(x = "Date",
y = "Canada Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Full Model, ARIMA(2,1,1)")

plot(new_covid_canada$Day[380:409], # Draw first time series
new_covid_canada$residuals_diff[380:409],
type = "l",
xlab = "Date",
ylab = "Canada Prediction vs. Acutal: Full Model")
lines(new_covid_canada$Day[380:409], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Canada Full
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 1216.596
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 3529.257
#USA : Yt - Mt - St
ts_covid_usa = ts(covid_usa$new_cases_per_million, frequency = 7)
#decompose(ts_covid_usa)
new_ts_covid_usa = ts_covid_usa - decompose(ts_covid_usa)$seasonal - decompose(ts_covid_usa)$trend
plot.ts(new_ts_covid_usa)

new_covid_usa <-
new_covid_usa %>% mutate("new_ts_covid_usa" = new_ts_covid_usa) %>%
na.omit()
regression_model = lm(new_ts_covid_usa~1, data = new_covid_usa[1:373,])
summary(regression_model)
##
## Call:
## lm(formula = new_ts_covid_usa ~ 1, data = new_covid_usa[1:373,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -304.65 -28.81 -4.82 38.92 388.12
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.01713 4.26744 -0.004 0.997
##
## Residual standard error: 82.42 on 372 degrees of freedom
anova(regression_model)
## Analysis of Variance Table
##
## Response: new_ts_covid_usa
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 372 2526888 6792.7
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(5,0,3) with zero mean
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1 ma2 ma3
## -0.0456 -0.8651 -0.2450 -0.3038 -0.5619 -0.7014 0.5365 -0.6413
## s.e. 0.0536 0.0472 0.0651 0.0422 0.0466 0.0595 0.0508 0.0661
##
## sigma^2 estimated as 2326: log likelihood=-1973.94
## AIC=3965.89 AICc=3966.38 BIC=4001.18
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.04331 47.71312 31.00802 -49.67664 453.6815 0.4065845 -0.08202149
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(5,0,3) with zero mean
## Q* = 67.776, df = 3, p-value = 1.277e-14
##
## Model df: 8. Total lags used: 11
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_covid_usa[374:403,])
error_forecast = predict(error_model, n.ahead = 30)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_covid_usa$new_ts_covid_usa[1:403])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_covid_usa[1:403,]) +
geom_line(aes(x=Day, y=new_ts_covid_usa, color = "black")) +
geom_line(aes(x=Day, y=compare_table$predict, color = "red"))+
labs(x = "Date",
y = "USA Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Null Model, ARIMA(5,0,3)")

plot(covid_usa$Day[374:403], # Draw first time series
new_covid_usa$new_ts_covid_usa[374:403],
type = "l",
xlab = "Date",
ylab = "USA Prediction vs. Acutal: Null Model")
lines(covid_usa$Day[374:403], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE USA Null Method2
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 2276.541
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 4041.949
regression_model = lm(new_ts_covid_usa~as.factor(vaccination_policy), data = new_covid_usa[1:373,])
summary(regression_model)
##
## Call:
## lm(formula = new_ts_covid_usa ~ as.factor(vaccination_policy),
## data = new_covid_usa[1:373, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -303.60 -28.50 -4.74 38.76 388.46
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.6476 8.8956 0.073 0.942
## as.factor(vaccination_policy)1 -1.7141 13.0738 -0.131 0.896
## as.factor(vaccination_policy)2 1.5087 17.3555 0.087 0.931
## as.factor(vaccination_policy)3 -0.5040 20.5756 -0.024 0.980
## as.factor(vaccination_policy)4 -0.7638 22.5702 -0.034 0.973
## as.factor(vaccination_policy)5 -0.9990 11.2668 -0.089 0.929
##
## Residual standard error: 82.97 on 367 degrees of freedom
## Multiple R-squared: 0.0001125, Adjusted R-squared: -0.01351
## F-statistic: 0.008257 on 5 and 367 DF, p-value: 1
anova(regression_model)
## Analysis of Variance Table
##
## Response: new_ts_covid_usa
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(vaccination_policy) 5 284 56.8 0.0083 1
## Residuals 367 2526604 6884.5
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(2,0,2) with zero mean
##
## Coefficients:
## ar1 ar2 ma1 ma2
## 0.8430 -0.5297 -1.5722 0.6614
## s.e. 0.0632 0.0541 0.0508 0.0632
##
## sigma^2 estimated as 3624: log likelihood=-2057.01
## AIC=4124.02 AICc=4124.18 BIC=4143.63
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.8497896 59.87576 39.16667 191.5565 368.7459 0.5136889
## ACF1
## Training set -0.03077804
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,0,2) with zero mean
## Q* = 162.52, df = 6, p-value < 2.2e-16
##
## Model df: 4. Total lags used: 10
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_covid_usa[374:403,])
error_forecast = predict(error_model, n.ahead = 30)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_covid_usa$new_ts_covid_usa[1:403])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_covid_usa[1:403,]) +
geom_line(aes(x=Day, y=new_ts_covid_usa, color = "black")) +
geom_line(aes(x=Day, y=compare_table$predict, color = "red"))+
labs(x = "Date",
y = "USA Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Full Model, ARIMA(2,0,2)")

plot(covid_usa$Day[374:403], # Draw first time series
new_covid_usa$new_ts_covid_usa[374:403],
type = "l",
xlab = "Date",
ylab = "USA Prediction vs. Acutal: Full Model")
lines(covid_usa$Day[374:403], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE USA Full Method 2
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 3585.106
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 7410.106
#Canada : Yt - Mt - St
ts_covid_canada = ts(covid_canada$new_cases_per_million, frequency = 7)
#decompose(ts_covid_canada)
new_ts_covid_canada = ts_covid_canada - decompose(ts_covid_canada)$seasonal - decompose(ts_covid_canada)$trend
plot.ts(new_ts_covid_canada)

new_covid_canada <-
new_covid_canada %>% mutate("new_ts_covid_canada" = new_ts_covid_canada) %>%
na.omit()
regression_model = lm(new_ts_covid_canada~1, data = new_covid_canada[1:373,])
summary(regression_model)
##
## Call:
## lm(formula = new_ts_covid_canada ~ 1, data = new_covid_canada[1:373,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -116.663 -8.996 -0.421 8.972 159.511
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.2513 1.2566 -0.2 0.842
##
## Residual standard error: 24.27 on 372 degrees of freedom
anova(regression_model)
## Analysis of Variance Table
##
## Response: new_ts_covid_canada
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 372 219109 589
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(5,0,1) with non-zero mean
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1 mean
## -0.0057 -0.4892 -0.3502 -0.2449 -0.2394 -0.7730 0.2427
## s.e. 0.0730 0.0590 0.0656 0.0591 0.0640 0.0604 0.0897
##
## sigma^2 estimated as 312.8: log likelihood=-1598.8
## AIC=3213.59 AICc=3213.99 BIC=3244.96
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.01631147 17.51833 11.42657 -8.796033 310.0721 0.5043037
## ACF1
## Training set -0.01265951
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(5,0,1) with non-zero mean
## Q* = 7.2743, df = 3, p-value = 0.06365
##
## Model df: 7. Total lags used: 10
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_covid_canada[374:403,])
error_forecast = predict(error_model, n.ahead = 30)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_covid_canada$new_ts_covid_canada[1:403])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_covid_canada[1:403,]) +
geom_line(aes(x=Day, y=new_ts_covid_canada, color = "black")) +
geom_line(aes(x=Day, y=compare_table$predict, color = "red"))+
labs(x = "Date",
y = "Canada Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Null Model, ARIMA(5,0,1)")

plot(new_covid_canada$Day[374:403], # Draw first time series
new_covid_canada$new_ts_covid_canada[374:403],
type = "l",
xlab = "Date",
ylab = "Canada Prediction vs. Acutal: Null Model")
lines(new_covid_canada$Day[374:403], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Canada Null Method 2
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 306.8921
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 1752.865
regression_model = lm(new_ts_covid_canada~as.factor(vaccination_policy), data = new_covid_canada[1:373,])
summary(regression_model)
##
## Call:
## lm(formula = new_ts_covid_canada ~ as.factor(vaccination_policy),
## data = new_covid_canada[1:373, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -116.340 -9.528 -0.078 9.255 160.184
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2814 2.6154 0.108 0.914
## as.factor(vaccination_policy)2 -0.8559 4.8344 -0.177 0.860
## as.factor(vaccination_policy)3 -0.1180 3.4536 -0.034 0.973
## as.factor(vaccination_policy)4 -0.8518 6.3169 -0.135 0.893
## as.factor(vaccination_policy)5 -1.2065 3.4663 -0.348 0.728
##
## Residual standard error: 24.4 on 368 degrees of freedom
## Multiple R-squared: 0.0004683, Adjusted R-squared: -0.0104
## F-statistic: 0.0431 on 4 and 368 DF, p-value: 0.9965
anova(regression_model)
## Analysis of Variance Table
##
## Response: new_ts_covid_canada
## Df Sum Sq Mean Sq F value Pr(>F)
## as.factor(vaccination_policy) 4 103 25.65 0.0431 0.9965
## Residuals 368 219006 595.12
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(5,0,2) with zero mean
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1 ma2
## 0.4319 -0.6236 -0.2139 -0.1885 -0.2052 -1.1729 0.4948
## s.e. 0.1398 0.0638 0.0853 0.0624 0.0721 0.1338 0.1166
##
## sigma^2 estimated as 322.3: log likelihood=-1604.25
## AIC=3224.5 AICc=3224.89 BIC=3255.87
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.25696 17.78253 11.55249 189.9477 556.9271 0.5099318 -0.009867794
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(5,0,2) with zero mean
## Q* = 3.473, df = 3, p-value = 0.3243
##
## Model df: 7. Total lags used: 10
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_covid_canada[374:403,])
error_forecast = predict(error_model, n.ahead = 30)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_covid_canada$new_ts_covid_canada[1:403])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_covid_canada[1:403,]) +
geom_line(aes(x=Day, y=new_ts_covid_canada, color = "black")) +
geom_line(aes(x=Day, y=compare_table$predict, color = "red"))+
labs(x = "Date",
y = "Canada Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Full Model, ARIMA(5,0,2)")

plot(new_covid_canada$Day[374:403], # Draw first time series
new_covid_canada$new_ts_covid_canada[374:403],
type = "l",
xlab = "Date",
ylab = "Canada Prediction vs. Acutal: Full Model")
lines(new_covid_canada$Day[374:403], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Canada Full Method 2
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 316.2185
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 1772.211
#USA example aggregate weekly data
ts_covid_usa = ts(covid_usa$residuals,frequency = 7)
ts_covid_usa_2 = ts(colSums(matrix(ts_covid_usa, nrow=7)))
## Warning in matrix(ts_covid_usa, nrow = 7): data length [409] is not a sub-
## multiple or multiple of the number of rows [7]
vaccine_policy = colSums(matrix(as.numeric(covid_usa$vaccination_policy), nrow=7))
## Warning in matrix(as.numeric(covid_usa$vaccination_policy), nrow = 7): data
## length [409] is not a sub-multiple or multiple of the number of rows [7]
new_ts_covid_usa = data.frame(1:59,ts_covid_usa_2,vaccine_policy)
names(new_ts_covid_usa) = c("week","weekly_aggregated_residuals","vaccination_policy")
plot.ts(ts_covid_usa_2)

regression_model = lm(weekly_aggregated_residuals~1, data = new_ts_covid_usa[1:53,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ 1, data = new_ts_covid_usa[1:53,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1526.0 -993.1 -603.0 1075.4 3009.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 820.5 174.5 4.702 1.94e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1270 on 52 degrees of freedom
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 52 83935233 1614139
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(1,1,0)
##
## Coefficients:
## ar1
## 0.3295
## s.e. 0.1337
##
## sigma^2 estimated as 140228: log likelihood=-381.46
## AIC=766.93 AICc=767.17 BIC=770.83
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 17.41164 367.3372 247.9875 -15.6555 40.55201 0.8408558 -0.0331298
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,0)
## Q* = 10.186, df = 9, p-value = 0.3356
##
## Model df: 1. Total lags used: 10
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_usa[54:58,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_usa$weekly_aggregated_residuals[1:58])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_usa[1:58,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "USA Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Null Model, ARIMA(1,1,0)")

plot(new_ts_covid_usa$week[54:58], # Draw first time series
new_ts_covid_usa$weekly_aggregated_residuals[54:58],
type = "l",
xlab = "Date",
ylab = "USA Prediction vs. Acutal: Null Model")
lines(new_ts_covid_usa$week[54:58], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE USA Null Method 3
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 134936.6
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 418526.4
regression_model = lm(weekly_aggregated_residuals~vaccination_policy, data = new_ts_covid_usa[1:53,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ vaccination_policy,
## data = new_ts_covid_usa[1:53, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1607.8 -947.5 -544.2 1020.5 2658.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1616.21 322.63 5.010 6.92e-06 ***
## vaccination_policy -31.78 11.11 -2.861 0.0061 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1191 on 51 degrees of freedom
## Multiple R-squared: 0.1383, Adjusted R-squared: 0.1214
## F-statistic: 8.188 on 1 and 51 DF, p-value: 0.006101
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## vaccination_policy 1 11611275 11611275 8.1878 0.006101 **
## Residuals 51 72323958 1418117
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(5,0,0) with zero mean
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5
## 1.0396 -0.0667 0.0437 0.2100 -0.4161
## s.e. 0.1295 0.2147 0.2321 0.2332 0.1417
##
## sigma^2 estimated as 105262: log likelihood=-380.99
## AIC=773.97 AICc=775.8 BIC=785.79
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -8.56323 308.7581 238.2628 0.6498531 37.30185 0.8149303
## ACF1
## Training set -0.02779966
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(5,0,0) with zero mean
## Q* = 1.6662, df = 5, p-value = 0.8931
##
## Model df: 5. Total lags used: 10
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_usa[54:58,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_usa$weekly_aggregated_residuals[1:58])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_usa[1:58,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "USA Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Full Model, ARIMA(5,0,0)")

plot(new_ts_covid_usa$week[54:58], # Draw first time series
new_ts_covid_usa$weekly_aggregated_residuals[54:58],
type = "l",
xlab = "Date",
ylab = "USA Prediction vs. Acutal: Full Model")
lines(new_ts_covid_usa$week[54:58], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE USA Full Method3
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 95331.56
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 67947.85
#Canada Example on aggregate weekly
ts_covid_canada = ts(covid_canada$residuals,frequency = 7)
ts_covid_canada_2 = ts(colSums(matrix(ts_covid_canada, nrow=7)))
## Warning in matrix(ts_covid_canada, nrow = 7): data length [409] is not a sub-
## multiple or multiple of the number of rows [7]
vaccine_policy = colSums(matrix(as.numeric(covid_canada$vaccination_policy), nrow=7))
## Warning in matrix(as.numeric(covid_canada$vaccination_policy), nrow = 7): data
## length [409] is not a sub-multiple or multiple of the number of rows [7]
new_ts_covid_canada = data.frame(1:59,ts_covid_canada_2,vaccine_policy)
names(new_ts_covid_canada) = c("week","weekly_aggregated_residuals","vaccination_policy")
plot.ts(ts_covid_canada_2)

regression_model = lm(weekly_aggregated_residuals~1, data = new_ts_covid_canada[1:53,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ 1, data = new_ts_covid_canada[1:53,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -538.06 -319.27 -50.34 297.63 737.85
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -309.04 50.45 -6.126 1.22e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 367.3 on 52 degrees of freedom
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 52 7013442 134874
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(2,0,0) with zero mean
##
## Coefficients:
## ar1 ar2
## 1.5911 -0.6923
## s.e. 0.0937 0.0944
##
## sigma^2 estimated as 8017: log likelihood=-314.13
## AIC=634.26 AICc=634.75 BIC=640.17
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.56964 87.83277 58.27775 8.972917 36.87626 0.6045546 -0.06785543
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,0,0) with zero mean
## Q* = 5.3504, df = 8, p-value = 0.7195
##
## Model df: 2. Total lags used: 10
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_canada[54:58,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_canada$weekly_aggregated_residuals[1:58])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_canada[1:58,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Canada Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Null Model, ARIMA(2,0,0)")

plot(new_ts_covid_canada$week[54:58], # Draw first time series
new_ts_covid_canada$weekly_aggregated_residuals[54:58],
type = "l",
xlab = "Date",
ylab = "Canada Prediction vs. Acutal: Null Model")
lines(new_ts_covid_canada$week[54:58], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Canada Null method 3
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 7714.595
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 16810.94
regression_model = lm(weekly_aggregated_residuals~vaccination_policy, data = new_ts_covid_canada[1:53,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ vaccination_policy,
## data = new_ts_covid_canada[1:53, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -615.71 -274.08 -40.79 235.97 747.40
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -110.149 112.006 -0.983 0.3300
## vaccination_policy -7.444 3.768 -1.976 0.0536 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 357.4 on 51 degrees of freedom
## Multiple R-squared: 0.07109, Adjusted R-squared: 0.05288
## F-statistic: 3.903 on 1 and 51 DF, p-value: 0.05362
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## vaccination_policy 1 498583 498583 3.903 0.05362 .
## Residuals 51 6514859 127742
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(2,0,1) with zero mean
##
## Coefficients:
## ar1 ar2 ma1
## 1.7818 -0.8713 -0.4328
## s.e. 0.0923 0.0883 0.1981
##
## sigma^2 estimated as 7966: log likelihood=-313.48
## AIC=634.97 AICc=635.8 BIC=642.85
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.3769429 86.69048 56.97042 231.7411 304.8481 0.5984639
## ACF1
## Training set 0.0006284805
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,0,1) with zero mean
## Q* = 2.4597, df = 7, p-value = 0.9301
##
## Model df: 3. Total lags used: 10
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_canada[54:58,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_canada$weekly_aggregated_residuals[1:58])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_canada[1:58,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Canada Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Full Model, ARIMA(2,0,1)")

plot(new_ts_covid_canada$week[54:58], # Draw first time series
new_ts_covid_canada$weekly_aggregated_residuals[54:58],
type = "l",
xlab = "Date",
ylab = "Canada Prediction vs. Acutal: Full Model")
lines(new_ts_covid_canada$week[54:58], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Canada Full Method 3
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 7515.24
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 8541.354
#Cote Example on aggregate weekly
ts_covid_cote = ts(covid_cote$residuals,frequency = 7)
ts_covid_cote_2 = ts(colSums(matrix(ts_covid_cote, nrow=7)))
## Warning in matrix(ts_covid_cote, nrow = 7): data length [258] is not a sub-
## multiple or multiple of the number of rows [7]
vaccine_policy = colSums(matrix(as.numeric(covid_cote$vaccination_policy), nrow=7))
## Warning in matrix(as.numeric(covid_cote$vaccination_policy), nrow = 7): data
## length [258] is not a sub-multiple or multiple of the number of rows [7]
new_ts_covid_cote = data.frame(1:37,ts_covid_cote_2,vaccine_policy)
names(new_ts_covid_cote) = c("week","weekly_aggregated_residuals","vaccination_policy")
plot.ts(ts_covid_cote_2)

regression_model = lm(weekly_aggregated_residuals~1, data = new_ts_covid_cote[1:32,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ 1, data = new_ts_covid_cote[1:32,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.270 -21.875 -5.835 9.707 77.706
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -97.329 4.997 -19.48 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 28.27 on 31 degrees of freedom
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 31 24771 799.08
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(0,1,0)
##
## sigma^2 estimated as 403: log likelihood=-136.97
## AIC=275.94 AICc=276.08 BIC=277.38
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.07958716 19.75977 11.62652 22.98514 109.0915 0.9687882
## ACF1
## Training set -0.05123007
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,0)
## Q* = 8.6306, df = 6, p-value = 0.1954
##
## Model df: 0. Total lags used: 6
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_cote[33:37,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_cote$weekly_aggregated_residuals[1:37])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_cote[1:37,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Cote Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Null Model, ARIMA(0,1,0)")

plot(new_ts_covid_cote$week[33:37], # Draw first time series
new_ts_covid_cote$weekly_aggregated_residuals[33:37],
type = "l",
xlab = "Date",
ylab = "Cote Prediction vs. Acutal: Null Model")
lines(new_ts_covid_cote$week[33:37], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Cote Null
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 390.4484
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 849.6274
regression_model = lm(weekly_aggregated_residuals~vaccination_policy, data = new_ts_covid_cote[1:32,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ vaccination_policy,
## data = new_ts_covid_cote[1:32, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -34.866 -11.376 -8.633 9.711 65.812
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -62.7813 8.9283 -7.032 8.14e-08 ***
## vaccination_policy -1.1327 0.2619 -4.325 0.000155 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.55 on 30 degrees of freedom
## Multiple R-squared: 0.384, Adjusted R-squared: 0.3635
## F-statistic: 18.7 on 1 and 30 DF, p-value: 0.0001555
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## vaccination_policy 1 9512.3 9512.3 18.701 0.0001555 ***
## Residuals 30 15259.1 508.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(1,0,0) with zero mean
##
## Coefficients:
## ar1
## 0.5846
## s.e. 0.1421
##
## sigma^2 estimated as 322.1: log likelihood=-137.5
## AIC=279.01 AICc=279.42 BIC=281.94
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.5298636 17.66441 10.70876 70.46478 73.72894 0.9174553 0.05083025
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,0) with zero mean
## Q* = 5.675, df = 5, p-value = 0.3391
##
## Model df: 1. Total lags used: 6
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_cote[33:37,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_cote$weekly_aggregated_residuals[1:37])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_cote[1:37,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Cote Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Full Model, ARIMA(1,0,0)")

plot(new_ts_covid_cote$week[33:37], # Draw first time series
new_ts_covid_cote$weekly_aggregated_residuals[33:37],
type = "l",
xlab = "Date",
ylab = "Cote Prediction vs. Acutal: Full Model")
lines(new_ts_covid_cote$week[33:37], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Cote Full
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 312.0312
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 319.3056
#Ghana Example on aggregate weekly
ts_covid_ghana = ts(covid_ghana$residuals,frequency = 7)
ts_covid_ghana_2 = ts(colSums(matrix(ts_covid_ghana, nrow=7)))
## Warning in matrix(ts_covid_ghana, nrow = 7): data length [208] is not a sub-
## multiple or multiple of the number of rows [7]
vaccine_policy = colSums(matrix(as.numeric(covid_ghana$vaccination_policy), nrow=7))
## Warning in matrix(as.numeric(covid_ghana$vaccination_policy), nrow = 7): data
## length [208] is not a sub-multiple or multiple of the number of rows [7]
new_ts_covid_ghana = data.frame(1:30,ts_covid_ghana_2,vaccine_policy)
names(new_ts_covid_ghana) = c("week","weekly_aggregated_residuals","vaccination_policy")
plot.ts(ts_covid_ghana_2)

regression_model = lm(weekly_aggregated_residuals~1, data = new_ts_covid_ghana[1:25,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ 1, data = new_ts_covid_ghana[1:25,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -55.23 -38.87 -17.64 29.10 115.67
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -73.527 9.855 -7.461 1.06e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 49.27 on 24 degrees of freedom
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 24 58267 2427.8
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(1,0,0) with zero mean
##
## Coefficients:
## ar1
## 0.6933
## s.e. 0.1360
##
## sigma^2 estimated as 1209: log likelihood=-124.01
## AIC=252.02 AICc=252.56 BIC=254.45
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.162571 34.06465 26.80162 63.60824 92.10038 0.9803189 -0.05002308
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,0) with zero mean
## Q* = 4.0198, df = 4, p-value = 0.4033
##
## Model df: 1. Total lags used: 5
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_ghana[26:30,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_ghana$weekly_aggregated_residuals[1:30])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_ghana[1:30,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Ghana Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Null Model, ARIMA(1,0,0)")

plot(new_ts_covid_ghana$week[26:30], # Draw first time series
new_ts_covid_ghana$weekly_aggregated_residuals[26:30],
type = "l",
xlab = "Date",
ylab = "Ghana Prediction vs. Acutal: Null Model")
lines(new_ts_covid_ghana$week[26:30], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Ghana Null
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 1160.4
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 1152.268
regression_model = lm(weekly_aggregated_residuals~vaccination_policy, data = new_ts_covid_ghana[1:25,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ vaccination_policy,
## data = new_ts_covid_ghana[1:25, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -56.78 -37.01 -12.13 37.07 107.15
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -44.9807 19.6074 -2.294 0.0313 *
## vaccination_policy -1.6219 0.9743 -1.665 0.1095
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 47.55 on 23 degrees of freedom
## Multiple R-squared: 0.1075, Adjusted R-squared: 0.06874
## F-statistic: 2.772 on 1 and 23 DF, p-value: 0.1095
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## vaccination_policy 1 6266 6266.4 2.7716 0.1095
## Residuals 23 52001 2260.9
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(1,0,0) with zero mean
##
## Coefficients:
## ar1
## 0.668
## s.e. 0.148
##
## sigma^2 estimated as 1206: log likelihood=-123.95
## AIC=251.9 AICc=252.44 BIC=254.34
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.729828 34.02841 26.73081 248.3642 366.6443 0.9610988 -0.06799195
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,0) with zero mean
## Q* = 3.2926, df = 4, p-value = 0.5101
##
## Model df: 1. Total lags used: 5
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_ghana[26:30,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_ghana$weekly_aggregated_residuals[1:30])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_ghana[1:30,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Ghana Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Full Model, ARIMA(1,0,0)")

plot(new_ts_covid_ghana$week[26:30], # Draw first time series
new_ts_covid_ghana$weekly_aggregated_residuals[26:30],
type = "l",
xlab = "Date",
ylab = "Ghana Prediction vs. Acutal: Full Model")
lines(new_ts_covid_ghana$week[26:30], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Ghana Full
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 1157.933
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 745.748
#Guatemala Example on aggregate weekly
ts_covid_guatemala = ts(covid_guatemala$residuals,frequency = 7)
ts_covid_guatemala_2 = ts(colSums(matrix(ts_covid_guatemala, nrow=7)))
## Warning in matrix(ts_covid_guatemala, nrow = 7): data length [374] is not a sub-
## multiple or multiple of the number of rows [7]
vaccine_policy = colSums(matrix(as.numeric(covid_guatemala$vaccination_policy), nrow=7))
## Warning in matrix(as.numeric(covid_guatemala$vaccination_policy), nrow = 7):
## data length [374] is not a sub-multiple or multiple of the number of rows [7]
new_ts_covid_guatemala = data.frame(1:54,ts_covid_guatemala_2,vaccine_policy)
names(new_ts_covid_guatemala) = c("week","weekly_aggregated_residuals","vaccination_policy")
plot.ts(ts_covid_guatemala_2)

regression_model = lm(weekly_aggregated_residuals~1, data = new_ts_covid_guatemala[1:49,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ 1, data = new_ts_covid_guatemala[1:49,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -286.23 -257.64 -170.09 57.71 970.71
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -247.87 51.24 -4.837 1.4e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 358.7 on 48 degrees of freedom
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 48 6176129 128669
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## 0.3873
## s.e. 0.1313
##
## sigma^2 estimated as 5577: log likelihood=-274.72
## AIC=553.44 AICc=553.71 BIC=557.18
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 12.37604 73.14104 49.42821 -11.06218 57.80992 0.90031 -0.0148057
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1)
## Q* = 4.5724, df = 9, p-value = 0.8699
##
## Model df: 1. Total lags used: 10
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_guatemala[50:54,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_guatemala$weekly_aggregated_residuals[1:54])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_guatemala[1:54,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Guatemala Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Null Model, ARIMA(0,1,1)")

plot(new_ts_covid_guatemala$week[50:54], # Draw first time series
new_ts_covid_guatemala$weekly_aggregated_residuals[50:54],
type = "l",
xlab = "Date",
ylab = "Guatemala Prediction vs. Acutal: Null Model")
lines(new_ts_covid_guatemala$week[50:54], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Guaremala Null
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 5349.611
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 200715
regression_model = lm(weekly_aggregated_residuals~vaccination_policy, data = new_ts_covid_guatemala[1:49,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ vaccination_policy,
## data = new_ts_covid_guatemala[1:49, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -514.25 -47.85 40.39 65.28 524.37
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -753.607 58.086 -12.97 < 2e-16 ***
## vaccination_policy 27.202 2.702 10.07 2.59e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 204.1 on 47 degrees of freedom
## Multiple R-squared: 0.6831, Adjusted R-squared: 0.6764
## F-statistic: 101.3 on 1 and 47 DF, p-value: 2.589e-13
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## vaccination_policy 1 4219091 4219091 101.33 2.589e-13 ***
## Residuals 47 1957038 41639
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(2,0,0) with zero mean
##
## Coefficients:
## ar1 ar2
## 1.1469 -0.3687
## s.e. 0.1300 0.1310
##
## sigma^2 estimated as 10191: log likelihood=-295.37
## AIC=596.75 AICc=597.28 BIC=602.42
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.4284951 98.867 68.54279 -4.273555 118.4029 0.9044698
## ACF1
## Training set 0.0005101659
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,0,0) with zero mean
## Q* = 5.8776, df = 8, p-value = 0.6609
##
## Model df: 2. Total lags used: 10
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_guatemala[50:54,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_guatemala$weekly_aggregated_residuals[1:54])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_guatemala[1:54,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Guatemala Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Full Model, ARIMA(2,0,0)")

plot(new_ts_covid_guatemala$week[50:54], # Draw first time series
new_ts_covid_guatemala$weekly_aggregated_residuals[50:54],
type = "l",
xlab = "Date",
ylab = "Guatemala Prediction vs. Acutal: Full Model")
lines(new_ts_covid_guatemala$week[50:54], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Guatemala Full
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 9774.683
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 56430.54
#India Example on aggregate weekly
ts_covid_india = ts(covid_india$residuals,frequency = 7)
ts_covid_india_2 = ts(colSums(matrix(ts_covid_india, nrow=7)))
## Warning in matrix(ts_covid_india, nrow = 7): data length [362] is not a sub-
## multiple or multiple of the number of rows [7]
vaccine_policy = colSums(matrix(as.numeric(covid_india$vaccination_policy), nrow=7))
## Warning in matrix(as.numeric(covid_india$vaccination_policy), nrow = 7): data
## length [362] is not a sub-multiple or multiple of the number of rows [7]
new_ts_covid_india = data.frame(1:52,ts_covid_india_2,vaccine_policy)
names(new_ts_covid_india) = c("week","weekly_aggregated_residuals","vaccination_policy")
plot.ts(ts_covid_india_2)

regression_model = lm(weekly_aggregated_residuals~1, data = new_ts_covid_india[1:47,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ 1, data = new_ts_covid_india[1:47,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -268.9 -245.8 -214.3 -101.3 1534.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 138.25 69.31 1.995 0.052 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 475.2 on 46 degrees of freedom
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 46 10387457 225814
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(2,0,0) with zero mean
##
## Coefficients:
## ar1 ar2
## 1.7204 -0.8305
## s.e. 0.0697 0.0692
##
## sigma^2 estimated as 6969: log likelihood=-275.87
## AIC=557.74 AICc=558.3 BIC=563.29
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.1572028 81.68453 48.6424 -58.948 82.23303 0.5681809 0.2240344
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,0,0) with zero mean
## Q* = 14.925, df = 7, p-value = 0.03697
##
## Model df: 2. Total lags used: 9
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_india[48:52,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_india$weekly_aggregated_residuals[1:52])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_india[1:52,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "India Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Null Model, ARIMA(2,0,0)")

plot(new_ts_covid_india$week[48:52], # Draw first time series
new_ts_covid_india$weekly_aggregated_residuals[48:52],
type = "l",
xlab = "Date",
ylab = "India Prediction vs. Acutal: Null Model")
lines(new_ts_covid_india$week[48:52], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE India Null
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 6672.362
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 34539.97
regression_model = lm(weekly_aggregated_residuals~vaccination_policy, data = new_ts_covid_india[1:47,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ vaccination_policy,
## data = new_ts_covid_india[1:47, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -457.7 -322.2 -128.8 89.6 1414.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -175.800 153.607 -1.144 0.2585
## vaccination_policy 12.383 5.462 2.267 0.0282 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 455.2 on 45 degrees of freedom
## Multiple R-squared: 0.1025, Adjusted R-squared: 0.08257
## F-statistic: 5.14 on 1 and 45 DF, p-value: 0.02823
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## vaccination_policy 1 1064901 1064901 5.1403 0.02823 *
## Residuals 45 9322556 207168
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(2,0,2) with zero mean
##
## Coefficients:
## ar1 ar2 ma1 ma2
## 1.5202 -0.6679 0.3237 0.3316
## s.e. 0.1507 0.1446 0.2118 0.1545
##
## sigma^2 estimated as 7498: log likelihood=-276.62
## AIC=563.23 AICc=564.7 BIC=572.48
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.9512028 82.82397 58.35076 10.92665 37.01834 0.6452885
## ACF1
## Training set 0.02990413
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,0,2) with zero mean
## Q* = 4.4603, df = 5, p-value = 0.4852
##
## Model df: 4. Total lags used: 9
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_india[48:52,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_india$weekly_aggregated_residuals[1:52])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_india[1:52,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "India Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Full Model, ARIMA(2,0,2)")

plot(new_ts_covid_india$week[48:52], # Draw first time series
new_ts_covid_india$weekly_aggregated_residuals[48:52],
type = "l",
xlab = "Date",
ylab = "India Prediction vs. Acutal: Full Model")
lines(new_ts_covid_india$week[48:52], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE India Full
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 6859.81
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 99543.38
#Indonesia Example on aggregate weekly
ts_covid_indonesia = ts(covid_indonesia$residuals,frequency = 7)
ts_covid_indonesia_2 = ts(colSums(matrix(ts_covid_indonesia, nrow=7)))
## Warning in matrix(ts_covid_indonesia, nrow = 7): data length [219] is not a sub-
## multiple or multiple of the number of rows [7]
vaccine_policy = colSums(matrix(as.numeric(covid_indonesia$vaccination_policy), nrow=7))
## Warning in matrix(as.numeric(covid_indonesia$vaccination_policy), nrow = 7):
## data length [219] is not a sub-multiple or multiple of the number of rows [7]
new_ts_covid_indonesia = data.frame(1:32,ts_covid_indonesia_2,vaccine_policy)
names(new_ts_covid_indonesia) = c("week","weekly_aggregated_residuals","vaccination_policy")
plot.ts(ts_covid_indonesia_2)

regression_model = lm(weekly_aggregated_residuals~1, data = new_ts_covid_indonesia[1:27,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ 1, data = new_ts_covid_indonesia[1:27,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -102.43 -65.99 -52.22 39.94 390.97
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 61.55 20.92 2.942 0.00677 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 108.7 on 26 degrees of freedom
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 26 307203 11816
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(1,0,2) with non-zero mean
##
## Coefficients:
## ar1 ma1 ma2 mean
## 0.8774 0.8697 0.9756 75.1264
## s.e. 0.1239 0.4897 1.0765 134.9435
##
## sigma^2 estimated as 1480: log likelihood=-138.79
## AIC=287.59 AICc=290.45 BIC=294.07
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 3.874938 35.50205 25.14409 22.28285 58.36999 0.6683383 0.1219358
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,2) with non-zero mean
## Q* = 1.8983, df = 3, p-value = 0.5938
##
## Model df: 4. Total lags used: 7
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_indonesia[28:32,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_indonesia$weekly_aggregated_residuals[1:32])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_indonesia[1:32,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Indonesia Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Null Model, ARIMA(2,0,2)")

plot(new_ts_covid_indonesia$week[28:32], # Draw first time series
new_ts_covid_indonesia$weekly_aggregated_residuals[28:32],
type = "l",
xlab = "Date",
ylab = "Indonesia Prediction vs. Acutal: Null Model")
lines(new_ts_covid_indonesia$week[28:32], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Indonesia Null
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 1260.395
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 129437.1
regression_model = lm(weekly_aggregated_residuals~vaccination_policy, data = new_ts_covid_indonesia[1:27,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ vaccination_policy,
## data = new_ts_covid_indonesia[1:27, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -113.82 -75.13 -39.24 38.97 370.13
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -16.050 56.956 -0.282 0.780
## vaccination_policy 4.687 3.210 1.460 0.157
##
## Residual standard error: 106.4 on 25 degrees of freedom
## Multiple R-squared: 0.07858, Adjusted R-squared: 0.04172
## F-statistic: 2.132 on 1 and 25 DF, p-value: 0.1567
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## vaccination_policy 1 24140 24140 2.132 0.1567
## Residuals 25 283063 11322
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(0,0,2) with zero mean
##
## Coefficients:
## ma1 ma2
## 1.1193 0.9546
## s.e. 0.2620 0.4744
##
## sigma^2 estimated as 2936: log likelihood=-147.39
## AIC=300.78 AICc=301.82 BIC=304.66
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 5.115449 52.14034 36.28781 25.01186 61.62268 0.995775 0.361771
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,0,2) with zero mean
## Q* = 6.776, df = 3, p-value = 0.07939
##
## Model df: 2. Total lags used: 5
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_indonesia[28:32,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_indonesia$weekly_aggregated_residuals[1:32])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_indonesia[1:32,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Indonesia Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Full Model, ARIMA(2,0,1)")

plot(new_ts_covid_indonesia$week[28:32], # Draw first time series
new_ts_covid_indonesia$weekly_aggregated_residuals[28:32],
type = "l",
xlab = "Date",
ylab = "Indonesia Prediction vs. Acutal: Full Model")
lines(new_ts_covid_indonesia$week[28:32], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Indonesia Full
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 2718.615
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 223047.2
#Italy Example on aggregate weekly
ts_covid_italy = ts(covid_italy$residuals,frequency = 7)
ts_covid_italy_2 = ts(colSums(matrix(ts_covid_italy, nrow=7)))
## Warning in matrix(ts_covid_italy, nrow = 7): data length [395] is not a sub-
## multiple or multiple of the number of rows [7]
vaccine_policy = colSums(matrix(as.numeric(covid_italy$vaccination_policy), nrow=7))
## Warning in matrix(as.numeric(covid_italy$vaccination_policy), nrow = 7): data
## length [395] is not a sub-multiple or multiple of the number of rows [7]
new_ts_covid_italy = data.frame(1:57,ts_covid_italy_2,vaccine_policy)
names(new_ts_covid_italy) = c("week","weekly_aggregated_residuals","vaccination_policy")
plot.ts(ts_covid_italy_2)

regression_model = lm(weekly_aggregated_residuals~1, data = new_ts_covid_italy[1:52,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ 1, data = new_ts_covid_italy[1:52,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1181.19 -838.82 -59.01 633.30 2672.01
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 67.37 138.80 0.485 0.629
##
## Residual standard error: 1001 on 51 degrees of freedom
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 51 51089085 1001747
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(2,1,1)
##
## Coefficients:
## ar1 ar2 ma1
## 1.5750 -0.7912 -0.7692
## s.e. 0.1159 0.0932 0.1590
##
## sigma^2 estimated as 42989: log likelihood=-343.68
## AIC=695.37 AICc=696.24 BIC=703.1
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -13.91981 199.2044 139.7122 -15.69634 84.642 0.6064991 0.05854227
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,1)
## Q* = 6.9893, df = 7, p-value = 0.43
##
## Model df: 3. Total lags used: 10
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_italy[53:57,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_italy$weekly_aggregated_residuals[1:57])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_italy[1:57,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Italy Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Null Model, ARIMA(2,1,1)")

plot(new_ts_covid_italy$week[53:57], # Draw first time series
new_ts_covid_italy$weekly_aggregated_residuals[53:57],
type = "l",
xlab = "Date",
ylab = "Italy Prediction vs. Acutal: Null Model")
lines(new_ts_covid_italy$week[53:57], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Italy Null
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 39682.38
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 98947.38
regression_model = lm(weekly_aggregated_residuals~vaccination_policy, data = new_ts_covid_italy[1:52,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ vaccination_policy,
## data = new_ts_covid_italy[1:52, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1901.06 -331.13 -38.52 350.53 1605.76
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1540.025 195.017 7.897 2.4e-10 ***
## vaccination_policy -58.057 6.828 -8.503 2.8e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 646.3 on 50 degrees of freedom
## Multiple R-squared: 0.5912, Adjusted R-squared: 0.583
## F-statistic: 72.29 on 1 and 50 DF, p-value: 2.805e-11
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## vaccination_policy 1 30201415 30201415 72.295 2.805e-11 ***
## Residuals 50 20887669 417753
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(2,0,0) with zero mean
##
## Coefficients:
## ar1 ar2
## 1.5292 -0.8207
## s.e. 0.0813 0.0850
##
## sigma^2 estimated as 47627: log likelihood=-354.55
## AIC=715.09 AICc=715.59 BIC=720.94
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.2823539 213.9979 154.6673 -537.84 698.1204 0.6331228
## ACF1
## Training set -0.08598986
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,0,0) with zero mean
## Q* = 5.0949, df = 8, p-value = 0.7474
##
## Model df: 2. Total lags used: 10
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_italy[53:57,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_italy$weekly_aggregated_residuals[1:57])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_italy[1:57,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Italy Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Full Model, ARIMA(2,0,0)")

plot(new_ts_covid_italy$week[53:57], # Draw first time series
new_ts_covid_italy$weekly_aggregated_residuals[53:57],
type = "l",
xlab = "Date",
ylab = "Italy Prediction vs. Acutal: Full Model")
lines(new_ts_covid_italy$week[53:57], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Italy Full
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 45795.1
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 493516.2
#Japan Example on aggregate weekly
ts_covid_japan = ts(covid_japan$residuals,frequency = 7)
ts_covid_japan_2 = ts(colSums(matrix(ts_covid_japan, nrow=7)))
vaccine_policy = colSums(matrix(as.numeric(covid_japan$vaccination_policy), nrow=7))
new_ts_covid_japan = data.frame(1:50,ts_covid_japan_2,vaccine_policy)
names(new_ts_covid_japan) = c("week","weekly_aggregated_residuals","vaccination_policy")
plot.ts(ts_covid_japan_2)

regression_model = lm(weekly_aggregated_residuals~1, data = new_ts_covid_japan[1:45,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ 1, data = new_ts_covid_japan[1:45,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -206.53 -158.08 -98.11 7.20 894.92
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 96.09 40.11 2.396 0.0209 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 269 on 44 degrees of freedom
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 44 3184844 72383
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(2,1,1)
##
## Coefficients:
## ar1 ar2 ma1
## 1.5856 -0.8366 -0.7681
## s.e. 0.1005 0.0792 0.1809
##
## sigma^2 estimated as 4095: log likelihood=-244.91
## AIC=497.82 AICc=498.84 BIC=504.95
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 8.743547 61.07848 36.10528 160.3919 226.4182 0.5201579 0.09833099
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,1,1)
## Q* = 3.7162, df = 6, p-value = 0.715
##
## Model df: 3. Total lags used: 9
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_japan[46:50,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_japan$weekly_aggregated_residuals[1:50])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_japan[1:50,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Japan Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Null Model, ARIMA(2,1,1)")

plot(new_ts_covid_japan$week[46:50], # Draw first time series
new_ts_covid_japan$weekly_aggregated_residuals[46:50],
type = "l",
xlab = "Date",
ylab = "Japan Prediction vs. Acutal: Null Model")
lines(new_ts_covid_japan$week[46:50], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Japan Null
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 3730.581
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 147076.2
regression_model = lm(weekly_aggregated_residuals~vaccination_policy, data = new_ts_covid_japan[1:45,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ vaccination_policy,
## data = new_ts_covid_japan[1:45, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -362.40 -125.99 -29.33 57.61 738.43
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -121.423 72.188 -1.682 0.09981 .
## vaccination_policy 10.686 3.078 3.472 0.00119 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 240.5 on 43 degrees of freedom
## Multiple R-squared: 0.2189, Adjusted R-squared: 0.2008
## F-statistic: 12.05 on 1 and 43 DF, p-value: 0.001191
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## vaccination_policy 1 697232 697232 12.052 0.001191 **
## Residuals 43 2487612 57851
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(2,0,0) with zero mean
##
## Coefficients:
## ar1 ar2
## 1.6107 -0.8406
## s.e. 0.0741 0.0727
##
## sigma^2 estimated as 3944: log likelihood=-251.08
## AIC=508.16 AICc=508.74 BIC=513.58
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -1.962991 61.3877 39.73173 28.29837 45.90808 0.527788 0.09894474
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,0,0) with zero mean
## Q* = 4.3985, df = 7, p-value = 0.7329
##
## Model df: 2. Total lags used: 9
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_japan[46:50,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_japan$weekly_aggregated_residuals[1:50])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_japan[1:50,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Japan Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Full Model, ARIMA(2,0,0)")

plot(new_ts_covid_japan$week[46:50], # Draw first time series
new_ts_covid_japan$weekly_aggregated_residuals[46:50],
type = "l",
xlab = "Date",
ylab = "Japan Prediction vs. Acutal: Full Model")
lines(new_ts_covid_japan$week[46:50], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Japan Full
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 3768.449
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 113560.9
#Mexico Example on aggregate weekly
ts_covid_mexico = ts(covid_mexico$residuals,frequency = 7)
ts_covid_mexico_2 = ts(colSums(matrix(ts_covid_mexico, nrow=7)))
## Warning in matrix(ts_covid_mexico, nrow = 7): data length [401] is not a sub-
## multiple or multiple of the number of rows [7]
vaccine_policy = colSums(matrix(as.numeric(covid_mexico$vaccination_policy), nrow=7))
## Warning in matrix(as.numeric(covid_mexico$vaccination_policy), nrow = 7): data
## length [401] is not a sub-multiple or multiple of the number of rows [7]
new_ts_covid_mexico = data.frame(1:58,ts_covid_mexico_2,vaccine_policy)
names(new_ts_covid_mexico) = c("week","weekly_aggregated_residuals","vaccination_policy")
plot.ts(ts_covid_mexico_2)

regression_model = lm(weekly_aggregated_residuals~1, data = new_ts_covid_mexico[1:53,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ 1, data = new_ts_covid_mexico[1:53,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -300.90 -191.13 -68.37 126.46 545.70
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -274.58 32.93 -8.338 3.73e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 239.7 on 52 degrees of freedom
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 52 2988441 57470
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(1,0,0) with zero mean
##
## Coefficients:
## ar1
## 0.8794
## s.e. 0.0589
##
## sigma^2 estimated as 11890: log likelihood=-324.1
## AIC=652.21 AICc=652.45 BIC=656.15
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.576358 108.0099 79.89582 86.89919 149.0427 1.004145 0.06268612
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,0) with zero mean
## Q* = 9.3113, df = 9, p-value = 0.4091
##
## Model df: 1. Total lags used: 10
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_mexico[54:58,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_mexico$weekly_aggregated_residuals[1:58])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_mexico[1:58,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Mexico Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Null Model, ARIMA(1,0,0)")

plot(new_ts_covid_mexico$week[54:58], # Draw first time series
new_ts_covid_mexico$weekly_aggregated_residuals[54:58],
type = "l",
xlab = "Date",
ylab = "Mexico Prediction vs. Acutal: Null Model")
lines(new_ts_covid_mexico$week[54:58], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Mexico Null
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 11666.14
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 29676.86
regression_model = lm(weekly_aggregated_residuals~vaccination_policy, data = new_ts_covid_mexico[1:53,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ vaccination_policy,
## data = new_ts_covid_mexico[1:53, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -331.85 -183.81 -38.24 165.89 545.70
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -396.699 66.218 -5.991 2.12e-07 ***
## vaccination_policy 5.467 2.598 2.104 0.0403 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 232.2 on 51 degrees of freedom
## Multiple R-squared: 0.0799, Adjusted R-squared: 0.06185
## F-statistic: 4.428 on 1 and 51 DF, p-value: 0.04029
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## vaccination_policy 1 238762 238762 4.4285 0.04029 *
## Residuals 51 2749679 53915
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(1,0,0) with zero mean
##
## Coefficients:
## ar1
## 0.8691
## s.e. 0.0615
##
## sigma^2 estimated as 11834: log likelihood=-323.94
## AIC=651.88 AICc=652.12 BIC=655.82
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -1.042131 107.7525 80.94973 74.20563 236.201 1.010714 0.05946579
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,0) with zero mean
## Q* = 7.9807, df = 9, p-value = 0.5361
##
## Model df: 1. Total lags used: 10
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_mexico[54:58,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_mexico$weekly_aggregated_residuals[1:58])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_mexico[1:58,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Mexico Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Full Model, ARIMA(1,0,0)")

plot(new_ts_covid_mexico$week[54:58], # Draw first time series
new_ts_covid_mexico$weekly_aggregated_residuals[54:58],
type = "l",
xlab = "Date",
ylab = "Mexico Prediction vs. Acutal: Full Model")
lines(new_ts_covid_mexico$week[54:58], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Mexico Full
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 11610.6
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 34994.13
#Morocco Example on aggregate weekly
ts_covid_morocco = ts(covid_morocco$residuals,frequency = 7)
ts_covid_morocco_2 = ts(colSums(matrix(ts_covid_morocco, nrow=7)))
## Warning in matrix(ts_covid_morocco, nrow = 7): data length [337] is not a sub-
## multiple or multiple of the number of rows [7]
vaccine_policy = colSums(matrix(as.numeric(covid_morocco$vaccination_policy), nrow=7))
## Warning in matrix(as.numeric(covid_morocco$vaccination_policy), nrow = 7): data
## length [337] is not a sub-multiple or multiple of the number of rows [7]
new_ts_covid_morocco = data.frame(1:49,ts_covid_morocco_2,vaccine_policy)
names(new_ts_covid_morocco) = c("week","weekly_aggregated_residuals","vaccination_policy")
plot.ts(ts_covid_morocco_2)

regression_model = lm(weekly_aggregated_residuals~1, data = new_ts_covid_morocco[1:44,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ 1, data = new_ts_covid_morocco[1:44,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -333.5 -305.8 -225.6 196.7 1303.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 225.32 65.22 3.455 0.00125 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 432.6 on 43 degrees of freedom
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 43 8048484 187174
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(1,0,2) with zero mean
##
## Coefficients:
## ar1 ma1 ma2
## 0.7349 1.0149 0.5319
## s.e. 0.1147 0.1967 0.1703
##
## sigma^2 estimated as 15781: log likelihood=-275.26
## AIC=558.53 AICc=559.55 BIC=565.66
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -1.740112 121.2659 62.09287 26.98198 47.22166 0.6566894
## ACF1
## Training set -0.02084803
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,2) with zero mean
## Q* = 4.1937, df = 6, p-value = 0.6505
##
## Model df: 3. Total lags used: 9
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_morocco[45:49,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_morocco$weekly_aggregated_residuals[1:49])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_morocco[1:49,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Morocco Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Null Model, ARIMA(1,0,2)")

plot(new_ts_covid_morocco$week[45:49], # Draw first time series
new_ts_covid_morocco$weekly_aggregated_residuals[45:49],
type = "l",
xlab = "Date",
ylab = "Morocco Prediction vs. Acutal: Null Model")
lines(new_ts_covid_morocco$week[45:49], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Morocco Null
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 14705.42
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 64940.89
regression_model = lm(weekly_aggregated_residuals~vaccination_policy, data = new_ts_covid_morocco[1:44,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ vaccination_policy,
## data = new_ts_covid_morocco[1:44, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -343.2 -306.9 -221.8 187.1 1294.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 200.738 155.054 1.295 0.203
## vaccination_policy 1.224 6.984 0.175 0.862
##
## Residual standard error: 437.6 on 42 degrees of freedom
## Multiple R-squared: 0.0007302, Adjusted R-squared: -0.02306
## F-statistic: 0.03069 on 1 and 42 DF, p-value: 0.8618
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## vaccination_policy 1 5877 5877 0.0307 0.8618
## Residuals 42 8042607 191491
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(1,0,2) with zero mean
##
## Coefficients:
## ar1 ma1 ma2
## 0.7345 1.0162 0.5379
## s.e. 0.1140 0.1919 0.1677
##
## sigma^2 estimated as 15727: log likelihood=-275.2
## AIC=558.39 AICc=559.42 BIC=565.53
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -1.961342 121.0585 62.39035 -1.124799 42.34422 0.6570673
## ACF1
## Training set -0.01884444
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,2) with zero mean
## Q* = 3.9824, df = 6, p-value = 0.6791
##
## Model df: 3. Total lags used: 9
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_morocco[45:49,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_morocco$weekly_aggregated_residuals[1:49])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_morocco[1:49,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Morocco Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Full Model, ARIMA(1,0,2)")

plot(new_ts_covid_morocco$week[45:49], # Draw first time series
new_ts_covid_morocco$weekly_aggregated_residuals[45:49],
type = "l",
xlab = "Date",
ylab = "Morocco Prediction vs. Acutal: Full Model")
lines(new_ts_covid_morocco$week[45:49], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Morocco Full
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 14655.16
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 69560.69
#Portugal Example on aggregate weekly
ts_covid_portugal = ts(covid_portugal$residuals,frequency = 7)
ts_covid_portugal_2 = ts(colSums(matrix(ts_covid_portugal, nrow=7)))
## Warning in matrix(ts_covid_portugal, nrow = 7): data length [397] is not a sub-
## multiple or multiple of the number of rows [7]
vaccine_policy = colSums(matrix(as.numeric(covid_portugal$vaccination_policy), nrow=7))
## Warning in matrix(as.numeric(covid_portugal$vaccination_policy), nrow = 7): data
## length [397] is not a sub-multiple or multiple of the number of rows [7]
new_ts_covid_portugal = data.frame(1:57,ts_covid_portugal_2,vaccine_policy)
names(new_ts_covid_portugal) = c("week","weekly_aggregated_residuals","vaccination_policy")
plot.ts(ts_covid_portugal_2)

regression_model = lm(weekly_aggregated_residuals~1, data = new_ts_covid_portugal[1:52,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ 1, data = new_ts_covid_portugal[1:52,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1962.3 -1357.8 -502.9 735.9 6327.9
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 484.8 264.3 1.835 0.0724 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1906 on 51 degrees of freedom
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 51 185211019 3631589
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## 0.6961
## s.e. 0.1046
##
## sigma^2 estimated as 438577: log likelihood=-403.47
## AIC=810.94 AICc=811.19 BIC=814.8
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -5.741977 649.3911 408.5719 -28.33632 84.46924 0.858771 0.05847121
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1)
## Q* = 11.852, df = 9, p-value = 0.2218
##
## Model df: 1. Total lags used: 10
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_portugal[53:57,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_portugal$weekly_aggregated_residuals[1:57])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_portugal[1:57,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Portugal Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Null Model, ARIMA(0,1,1)")

plot(new_ts_covid_portugal$week[53:57], # Draw first time series
new_ts_covid_portugal$weekly_aggregated_residuals[53:57],
type = "l",
xlab = "Date",
ylab = "Portugal Prediction vs. Acutal: Null Model")
lines(new_ts_covid_portugal$week[53:57], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Portugual Null
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 421708.9
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 31015.46
regression_model = lm(weekly_aggregated_residuals~vaccination_policy, data = new_ts_covid_portugal[1:52,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ vaccination_policy,
## data = new_ts_covid_portugal[1:52, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -2384.6 -949.8 -431.6 416.2 5975.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2339.15 557.95 4.192 0.000112 ***
## vaccination_policy -71.53 19.49 -3.671 0.000588 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1708 on 50 degrees of freedom
## Multiple R-squared: 0.2123, Adjusted R-squared: 0.1965
## F-statistic: 13.47 on 1 and 50 DF, p-value: 0.0005879
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## vaccination_policy 1 39315862 39315862 13.474 0.0005879 ***
## Residuals 50 145895157 2917903
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(2,0,0) with zero mean
##
## Coefficients:
## ar1 ar2
## 1.4329 -0.6293
## s.e. 0.1035 0.1043
##
## sigma^2 estimated as 389096: log likelihood=-408.67
## AIC=823.35 AICc=823.85 BIC=829.2
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.549144 611.6622 364.5818 -12.97327 64.07895 0.742551 0.02914117
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,0,0) with zero mean
## Q* = 13.85, df = 8, p-value = 0.08577
##
## Model df: 2. Total lags used: 10
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_portugal[53:57,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_portugal$weekly_aggregated_residuals[1:57])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_portugal[1:57,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Portugal Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Full Model, ARIMA(2,0,0)")

plot(new_ts_covid_portugal$week[53:57], # Draw first time series
new_ts_covid_portugal$weekly_aggregated_residuals[53:57],
type = "l",
xlab = "Date",
ylab = "Portugal Prediction vs. Acutal: Full Model")
lines(new_ts_covid_portugal$week[53:57], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Portugual Full
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 374130.7
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 142490.8
#Russia Example on aggregate weekly
ts_covid_russia = ts(covid_russia$residuals,frequency = 7)
ts_covid_russia_2 = ts(colSums(matrix(ts_covid_russia, nrow=7)))
## Warning in matrix(ts_covid_russia, nrow = 7): data length [248] is not a sub-
## multiple or multiple of the number of rows [7]
vaccine_policy = colSums(matrix(as.numeric(covid_russia$vaccination_policy), nrow=7))
## Warning in matrix(as.numeric(covid_russia$vaccination_policy), nrow = 7): data
## length [248] is not a sub-multiple or multiple of the number of rows [7]
new_ts_covid_russia = data.frame(1:36,ts_covid_russia_2,vaccine_policy)
names(new_ts_covid_russia) = c("week","weekly_aggregated_residuals","vaccination_policy")
plot.ts(ts_covid_russia_2)

regression_model = lm(weekly_aggregated_residuals~1, data = new_ts_covid_russia[1:31,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ 1, data = new_ts_covid_russia[1:31,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -298.18 -252.58 -98.95 229.25 536.28
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -394.32 50.45 -7.816 1.01e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 280.9 on 30 degrees of freedom
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 30 2367105 78904
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(1,1,0)
##
## Coefficients:
## ar1
## 0.6921
## s.e. 0.1436
##
## sigma^2 estimated as 3745: log likelihood=-165.81
## AIC=335.62 AICc=336.06 BIC=338.42
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 4.446286 59.19288 46.38008 -60.96461 81.01134 0.7137882 -0.1062188
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,1,0)
## Q* = 7.5889, df = 5, p-value = 0.1804
##
## Model df: 1. Total lags used: 6
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_russia[32:36,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_portugal$weekly_aggregated_residuals[1:36])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_russia[1:36,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Russia Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Null Model, ARIMA(1,1,0)")

plot(new_ts_covid_russia$week[32:36], # Draw first time series
new_ts_covid_russia$weekly_aggregated_residuals[32:36],
type = "l",
xlab = "Date",
ylab = "Russia Prediction vs. Acutal: Null Model")
lines(new_ts_covid_russia$week[32:36], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Russia Null
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 6554158
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 676377
regression_model = lm(weekly_aggregated_residuals~vaccination_policy, data = new_ts_covid_russia[1:31,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ vaccination_policy,
## data = new_ts_covid_russia[1:31, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -441.83 -86.89 -27.87 167.53 318.34
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -85.952 67.210 -1.279 0.211
## vaccination_policy -12.918 2.375 -5.439 7.48e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 201 on 29 degrees of freedom
## Multiple R-squared: 0.505, Adjusted R-squared: 0.4879
## F-statistic: 29.59 on 1 and 29 DF, p-value: 7.483e-06
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## vaccination_policy 1 1195422 1195422 29.587 7.483e-06 ***
## Residuals 29 1171684 40403
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(2,0,1) with zero mean
##
## Coefficients:
## ar1 ar2 ma1
## 1.8773 -0.9499 -0.8101
## s.e. 0.0532 0.0475 0.1599
##
## sigma^2 estimated as 4223: log likelihood=-173.7
## AIC=355.39 AICc=356.93 BIC=361.13
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 4.363854 61.75662 48.8188 -13.63794 69.21278 0.7729344 -0.14973
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,0,1) with zero mean
## Q* = 5.7662, df = 3, p-value = 0.1236
##
## Model df: 3. Total lags used: 6
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_russia[32:36,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_portugal$weekly_aggregated_residuals[1:36])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_russia[1:36,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Russia Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Full Model, ARIMA(2,0,1)")

plot(new_ts_covid_russia$week[32:36], # Draw first time series
new_ts_covid_russia$weekly_aggregated_residuals[32:36],
type = "l",
xlab = "Date",
ylab = "Russia Prediction vs. Acutal: Full Model")
lines(new_ts_covid_russia$week[32:36], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Russia Full
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 6568545
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 436139.8
#Saudi Example on aggregate weekly
ts_covid_saudi = ts(covid_saudi$residuals,frequency = 7)
ts_covid_saudi_2 = ts(colSums(matrix(ts_covid_saudi, nrow=7)))
## Warning in matrix(ts_covid_saudi, nrow = 7): data length [403] is not a sub-
## multiple or multiple of the number of rows [7]
vaccine_policy = colSums(matrix(as.numeric(covid_saudi$vaccination_policy), nrow=7))
## Warning in matrix(as.numeric(covid_saudi$vaccination_policy), nrow = 7): data
## length [403] is not a sub-multiple or multiple of the number of rows [7]
new_ts_covid_saudi = data.frame(1:58,ts_covid_saudi_2,vaccine_policy)
names(new_ts_covid_saudi) = c("week","weekly_aggregated_residuals","vaccination_policy")
plot.ts(ts_covid_saudi_2)

regression_model = lm(weekly_aggregated_residuals~1, data = new_ts_covid_saudi[1:53,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ 1, data = new_ts_covid_saudi[1:53,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -166.55 -21.28 2.99 22.05 90.64
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -233.321 6.229 -37.46 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 45.35 on 52 degrees of freedom
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 52 106929 2056.3
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(0,1,3)
##
## Coefficients:
## ma1 ma2 ma3
## -0.2827 -0.4785 0.2656
## s.e. 0.1348 0.1521 0.1309
##
## sigma^2 estimated as 1286: log likelihood=-258.75
## AIC=525.5 AICc=526.35 BIC=533.3
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -3.336452 34.4829 25.19053 -21.89949 189.2627 0.9423321
## ACF1
## Training set -0.03222228
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,3)
## Q* = 9.723, df = 7, p-value = 0.2048
##
## Model df: 3. Total lags used: 10
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_saudi[54:58,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_saudi$weekly_aggregated_residuals[1:58])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_saudi[1:58,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Saudi Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Null Model, ARIMA(0,1,3)")

plot(new_ts_covid_saudi$week[54:58], # Draw first time series
new_ts_covid_saudi$weekly_aggregated_residuals[54:58],
type = "l",
xlab = "Date",
ylab = "Saudi Prediction vs. Acutal: Null Model")
lines(new_ts_covid_saudi$week[54:58], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Saudi Null
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 1189.071
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 788.4918
regression_model = lm(weekly_aggregated_residuals~vaccination_policy, data = new_ts_covid_saudi[1:53,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ vaccination_policy,
## data = new_ts_covid_saudi[1:53, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -164.978 -23.462 2.543 23.060 92.212
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -228.7081 15.1013 -15.145 <2e-16 ***
## vaccination_policy -0.1472 0.4382 -0.336 0.738
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 45.74 on 51 degrees of freedom
## Multiple R-squared: 0.002207, Adjusted R-squared: -0.01736
## F-statistic: 0.1128 on 1 and 51 DF, p-value: 0.7383
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## vaccination_policy 1 236 236.05 0.1128 0.7383
## Residuals 51 106693 2092.03
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(0,0,1) with zero mean
##
## Coefficients:
## ma1
## 0.6956
## s.e. 0.1116
##
## sigma^2 estimated as 1340: log likelihood=-265.83
## AIC=535.67 AICc=535.91 BIC=539.61
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.3146459 36.25454 25.51332 42.58368 210.6015 0.9520885
## ACF1
## Training set 0.03685257
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,0,1) with zero mean
## Q* = 15.487, df = 9, p-value = 0.0784
##
## Model df: 1. Total lags used: 10
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_saudi[54:58,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_saudi$weekly_aggregated_residuals[1:58])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_saudi[1:58,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Saudi Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Full Model, ARIMA(0,0,1)")

plot(new_ts_covid_saudi$week[54:58], # Draw first time series
new_ts_covid_saudi$weekly_aggregated_residuals[54:58],
type = "l",
xlab = "Date",
ylab = "Saudi Prediction vs. Acutal: Full Model")
lines(new_ts_covid_saudi$week[54:58], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE Saudi Full
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 1314.392
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 2212.317
#Uganda Example on aggregate weekly
covid_uganda <- readRDS("data/covid_north_weekly.RData")
covid_uganda <-
covid_uganda %>% filter(location == "Uganda") %>%
mutate("week" = 1:43)
regression_model = lm(adjusted_weekly_cases_per_million~1, data = covid_uganda[1:38,])
summary(regression_model)
##
## Call:
## lm(formula = adjusted_weekly_cases_per_million ~ 1, data = covid_uganda[1:38,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.42 -34.87 -17.96 13.24 150.40
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -86.698 7.691 -11.27 1.59e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 47.41 on 37 degrees of freedom
anova(regression_model)
## Analysis of Variance Table
##
## Response: adjusted_weekly_cases_per_million
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 37 83172 2247.9
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(2,0,0) with zero mean
##
## Coefficients:
## ar1 ar2
## 1.2996 -0.5073
## s.e. 0.1343 0.1335
##
## sigma^2 estimated as 412.8: log likelihood=-168.3
## AIC=342.61 AICc=343.32 BIC=347.52
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1984356 19.77471 12.94145 -14.11046 79.2795 0.8859306
## ACF1
## Training set 0.008854984
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,0,0) with zero mean
## Q* = 1.9946, df = 6, p-value = 0.9202
##
## Model df: 2. Total lags used: 8
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = covid_uganda[39:43,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),covid_uganda$adjusted_weekly_cases_per_million[1:43])
colnames(compare_table) = c("predict","actual")
ggplot(data = covid_uganda[1:43,]) +
geom_line(aes(x=week, y=adjusted_weekly_cases_per_million, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Uganda Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Null Model, ARIMA(2,0,0)")

plot(covid_uganda$week[39:43], # Draw first time series
covid_uganda$adjusted_weekly_cases_per_million[39:43],
type = "l",
xlab = "Date",
ylab = "Uganda Prediction vs. Acutal: Null Model")
lines(covid_uganda$week[39:43], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE South Africa Null
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 391.0393
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 614.8072
regression_model = lm(adjusted_weekly_cases_per_million~as.numeric(vaccination_policy), data = covid_uganda[1:38,])
summary(regression_model)
##
## Call:
## lm(formula = adjusted_weekly_cases_per_million ~ as.numeric(vaccination_policy),
## data = covid_uganda[1:38, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -46.48 -30.68 -15.23 22.22 131.14
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -119.722 15.733 -7.610 5.3e-09 ***
## as.numeric(vaccination_policy) 13.072 5.526 2.365 0.0235 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 44.72 on 36 degrees of freedom
## Multiple R-squared: 0.1345, Adjusted R-squared: 0.1105
## F-statistic: 5.596 on 1 and 36 DF, p-value: 0.02351
anova(regression_model)
## Analysis of Variance Table
##
## Response: adjusted_weekly_cases_per_million
## Df Sum Sq Mean Sq F value Pr(>F)
## as.numeric(vaccination_policy) 1 11188 11188.5 5.5955 0.02351 *
## Residuals 36 71983 1999.5
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(2,0,0) with zero mean
##
## Coefficients:
## ar1 ar2
## 1.2597 -0.4583
## s.e. 0.1403 0.1415
##
## sigma^2 estimated as 393.1: log likelihood=-167.32
## AIC=340.64 AICc=341.35 BIC=345.56
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.523593 19.29831 12.53067 236.7103 253.1082 0.8790713
## ACF1
## Training set -0.01965143
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,0,0) with zero mean
## Q* = 2.0956, df = 6, p-value = 0.9107
##
## Model df: 2. Total lags used: 8
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = covid_uganda[39:43,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),covid_uganda$adjusted_weekly_cases_per_million[1:43])
colnames(compare_table) = c("predict","actual")
ggplot(data = covid_uganda[1:43,]) +
geom_line(aes(x=week, y=adjusted_weekly_cases_per_million, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "Uganda Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Full Model, ARIMA(2,0,0)")

plot(covid_uganda$week[39:43], # Draw first time series
covid_uganda$adjusted_weekly_cases_per_million[39:43],
type = "l",
xlab = "Date",
ylab = "Uganda Prediction vs. Acutal: Null Model")
lines(covid_uganda$week[39:43], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE South Africa Null
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 372.425
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 2288.198
#United Kingdom Example on aggregate weekly
ts_covid_uk = ts(covid_uk$residuals,frequency = 7)
ts_covid_uk_2 = ts(colSums(matrix(ts_covid_uk, nrow=7)))
## Warning in matrix(ts_covid_uk, nrow = 7): data length [415] is not a sub-
## multiple or multiple of the number of rows [7]
vaccine_policy = colSums(matrix(as.numeric(covid_uk$vaccination_policy), nrow=7))
## Warning in matrix(as.numeric(covid_uk$vaccination_policy), nrow = 7): data
## length [415] is not a sub-multiple or multiple of the number of rows [7]
new_ts_covid_uk = data.frame(1:60,ts_covid_uk_2,vaccine_policy)
names(new_ts_covid_uk) = c("week","weekly_aggregated_residuals","vaccination_policy")
plot.ts(ts_covid_uk_2)

regression_model = lm(weekly_aggregated_residuals~1, data = new_ts_covid_uk[1:55,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ 1, data = new_ts_covid_uk[1:55,
## ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -2579.4 -1632.6 376.3 1036.9 4272.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -211.2 225.2 -0.938 0.352
##
## Residual standard error: 1670 on 54 degrees of freedom
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## Residuals 54 150595053 2788797
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(2,0,0) with zero mean
##
## Coefficients:
## ar1 ar2
## 1.4156 -0.5145
## s.e. 0.1123 0.1121
##
## sigma^2 estimated as 248034: log likelihood=-419.95
## AIC=845.9 AICc=846.37 BIC=851.92
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 13.37828 488.8913 351.8551 -65.33358 112.3605 0.8099452 0.03082382
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,0,0) with zero mean
## Q* = 8.4026, df = 8, p-value = 0.3952
##
## Model df: 2. Total lags used: 10
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_uk[56:60,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_uk$weekly_aggregated_residuals[1:60])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_uk[1:60,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "UK Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Null Model, ARIMA(2,0,0)")

plot(new_ts_covid_uk$week[56:60], # Draw first time series
new_ts_covid_uk$weekly_aggregated_residuals[56:60],
type = "l",
xlab = "Date",
ylab = "United Kingdom Prediction vs. Acutal: Null Model")
lines(new_ts_covid_uk$week[56:60], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE UK Null
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 239014.7
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 790416
regression_model = lm(weekly_aggregated_residuals~vaccination_policy, data = new_ts_covid_uk[1:55,])
summary(regression_model)
##
## Call:
## lm(formula = weekly_aggregated_residuals ~ vaccination_policy,
## data = new_ts_covid_uk[1:55, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -2587.1 -1511.6 52.3 1008.9 4265.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 310.95 547.88 0.568 0.573
## vaccination_policy -18.37 17.58 -1.045 0.301
##
## Residual standard error: 1669 on 53 degrees of freedom
## Multiple R-squared: 0.0202, Adjusted R-squared: 0.001709
## F-statistic: 1.092 on 1 and 53 DF, p-value: 0.3007
anova(regression_model)
## Analysis of Variance Table
##
## Response: weekly_aggregated_residuals
## Df Sum Sq Mean Sq F value Pr(>F)
## vaccination_policy 1 3041426 3041426 1.0925 0.3007
## Residuals 53 147553627 2784031
error = residuals(regression_model)
ts_error = ts(error)
#ARIMA on error
error_model = auto.arima(ts_error)
fitted_error = fitted(error_model)
summary(error_model)
## Series: ts_error
## ARIMA(2,0,0) with zero mean
##
## Coefficients:
## ar1 ar2
## 1.4245 -0.5277
## s.e. 0.1113 0.1115
##
## sigma^2 estimated as 252383: log likelihood=-420.43
## AIC=846.86 AICc=847.33 BIC=852.89
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 16.17417 493.1582 359.2741 4.718285 59.38791 0.8055654 0.02330247
checkresiduals(error_model)

##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,0,0) with zero mean
## Q* = 8.123, df = 8, p-value = 0.4215
##
## Model df: 2. Total lags used: 10
#goodness of fit
regression_prediction = predict(regression_model)
final_prediction = regression_prediction + fitted_error
regression_prediction = predict(regression_model, newdata = new_ts_covid_uk[56:60,])
error_forecast = predict(error_model, n.ahead = 5)$pred
final_prediction_2 = regression_prediction + error_forecast
#compare prediction and actual
compare_table = data.frame(c(final_prediction,final_prediction_2),new_ts_covid_uk$weekly_aggregated_residuals[1:60])
colnames(compare_table) = c("predict","actual")
ggplot(data = new_ts_covid_uk[1:60,]) +
geom_line(aes(x=week, y=weekly_aggregated_residuals, color = "black")) +
geom_line(aes(x=week, y=compare_table$predict, color = "red"))+
labs(x = "Week",
y = "UK Prediction vs. Acutal")+
scale_color_manual(name = c("color"),values = c("black","red"),labels = c("Actual","Prediction"))+
ggtitle("Full Model, ARIMA(2,0,0)")

plot(new_ts_covid_uk$week[56:60], # Draw first time series
new_ts_covid_uk$weekly_aggregated_residuals[56:60],
type = "l",
xlab = "Date",
ylab = "United Kingdom Prediction vs. Acutal: Full Model")
lines(new_ts_covid_uk$week[56:60], # Draw second time series
final_prediction_2,
type = "l",
col = "red")
legend(x = "topright",legend=c("actual", "prediction"),
col=c("black", "red"), lty=1, cex=0.5)

#Training MSE UK Full
SSE_train = sum((final_prediction - compare_table$actual[1:length(final_prediction)])^2)
MSE_train = SSE_train/length(final_prediction)
MSE_train
## [1] 243205
#Testing MSE
SSE_test = sum((final_prediction_2 - compare_table$actual[length(final_prediction)+1:length(final_prediction_2)])^2)
MSE_test = SSE_test/length(final_prediction_2)
MSE_test
## [1] 1020226